clear 
use "D:\caseload_study\caseloadmaster.dta"

********************************************************************************
* Do-file for Caseload Study of Texas Attorneys with Geoff Burkhart, George Naufal,
*  and Heather Caspers // last updated 8/82018 by ntd
********************************************************************************
********************************************************************************
********************************************************************************
* attorney cases, collapsing on all county-observations; this generates the 
*  attorney's full or statewide caseload 
********************************************************************************

	*all statewide cases by type
	bysort bar year: egen attyjuvcases = sum(juvenilecasespaid)
	bysort bar year: egen attycapmurdercases = sum( capitalmurdercasespaid )
	bysort bar year: egen attyadultfelcases = sum( adultfelonycasespaid )
	bysort bar year: egen attyadultmisdcases = sum( adultmisdemeanorcasespaid )
	bysort bar year: egen attyjuvappcases = sum( juvenileappealscasespaid )
	bysort bar year: egen attyfelappcases = sum( felonyappealscasespaid )
	bysort bar year: egen attymisdappcases = sum( misdemeanorappealscasespaid )
	*this sums up all felonies since we won't discriminate b/w capital and non-capital felony
	egen attyallfelcases = rowtotal(attycapmurdercases attyadultfelcases )
	
	*all cases within county
	egen nonjuvcases = rowtotal(capitalmurdercasespaid adultfelonycasespaid adultmisdemeanorcasespaid felonyappealscasespaid misdemeanorappealscasespaid)
	*summing across all cases an attorney files
	bysort bar year: egen njcases_all = total(nonjuvcases)

********************************************************************************
* FTEs based on caseload
********************************************************************************

	*FTE by case type
	bysort bar year: gen FTEjuv = attyjuvcases/230
	bysort bar year: gen FTEallfel = attyallfelcases/128
	bysort bar year: gen FTEmisd = attyadultmisdcases/226
	bysort bar year: gen FTEjuvapp = attyjuvappcases/31.2
	bysort bar year: gen FTEmisdapp = attymisdappcases/31.2
	bysort bar year: gen FTEfelapp = attyfelappcases/31.2
	
	*total FTEs
	egen FTE_row_nojuv = rowtotal( FTEallfel FTEmisd FTEmisdapp FTEfelapp)
	*removing missing
	bysort bar year: egen FTEnojuv = min(FTE_row_nojuv)
	
********************************************************************************
* all attorneys in given year
********************************************************************************
	
	gen attysnojuv = 1 if FTEnojuv !=.
	replace attysnojuv = . if FTEnojuv==0
	replace attysnojuv = . if attysnojuv==0
	*attorney_marker ensures we are working with unique attorneys and removing duplicates
	bysort bar year: gen attorney_marker = _n
	*generating all attorneys statewide
	bysort year: egen all_attysnojuv = total(attysnojuv) if attorney_marker==1

********************************************************************************
* PD estimates
********************************************************************************

* attorneys
	* to generate number of PD attorneys requires a bit of work within stata because
	*  it struggles to correctly assign the "_n" syntax if an attorney took PD cases
	*  AND also served as private assigned counsel. as such, the following code is necessary:
	gen bar1=bar+1
	replace bar1=. if pubdefender!=1
	*this generates the actual counter used in the summation command below, using
	* the "bar1" variable as opposed to the usual bar number
	bysort bar1 year: gen pdatty_counter = _n if pubdefender==1
	*purging out observations that do not comport with our desired metric
	replace pdatty_counter = . if pdatty_counter!=1
	replace pdatty_counter = . if bar==.
	*pdatty_county = unqiue PDs, next sum those all up
	bysort year: egen tx_number_pdattys_ = total(pdatty_counter)
	bysort year: egen tx_number_pdattys = min(tx_number_pdattys_)
	
* cases
	*all PDcases in tx
	bysort year: egen tx_PDcases = total(njcases_all) if pdatty_counter==1

********************************************************************************
* all cases in given year
********************************************************************************

	bysort year: egen tx_allcases = total(njcases_all) if attorney_marker==1
	
********************************************************************************
* avg cases in year
********************************************************************************

*********************
* all cases statewide
*********************	

	* across texas (all)
		bysort year: gen avg_caseload = tx_allcases / all_attysnojuv

*********************
* Public defender
*********************

	* PD all statewide
		bysort year: gen tx_pdavgcases = tx_PDcases / tx_number_pdattys
		*assigning that value to all tx-obs
		bysort year: egen tx_PDavgcases = min(tx_pdavgcases)
		

	* attys non dallas / bowie
		bysort bar county year: gen pd_n = _n if pubdefender==1
		replace pd_n = . if pd_n>1
		replace pd_n = . if countyid==57 
		replace pd_n = . if countyid==19
		bysort county year: egen county_pdattys_ = total(pd_n)
		bysort county year: egen county_pdattys = min(county_pdattys_)
		
		bysort county year: gen n = _n
		
		bysort year: egen year_pdattys = total(county_pdattys) if n==1
		
		bysort county year: egen pd_cases_ = total(nonjuvcases) if pd_n==1
		replace pd_cases = . if countyid==57 
		replace pd_cases = . if countyid==19
		bysort county year: egen pd_cases = min(pd_cases_)
		
		bysort year: egen year_pd_cases = total(pd_cases) if n==1
		
		bysort year: gen pdavg_nodalbo = year_pd_cases / year_pdattys
		
	* attys dallas 
		bysort year: egen pdattys_dal_ = total(pdatty_counter) if countyid==57 
		bysort year: egen pdattys_dal = min(pdattys_dal_)
	* cases dallas
		bysort year: egen pdcases_dal_ = total(nonjuvcases) if countyid==57 & pubdefender==1
		bysort year: egen pdcases_dal = min(pdcases_dal_)
	* average cases, dallas
		bysort year: gen avgcases_dal_ = pdcases_dal / pdattys_dal
		bysort year: egen avgcases_dal = min(avgcases_dal_)
		
	* attys bowie 
		bysort year: egen pdattys_bow_ = total(pdatty_counter) if countyid==19 
		bysort year: egen pdattys_bow = min(pdattys_bow_)
	* cases bowie
		bysort year: egen pdcases_bow_ = total(nonjuvcases) if countyid==19 & pubdefender==1
		bysort year: egen pdcases_bow = min(pdcases_bow_)
	* average cases, bowie
		bysort year: gen avgcases_bow_ = pdcases_bow / pdattys_bow
		bysort year: egen avgcases_bow = min(avgcases_bow_)


*********************		
* Non-PD 
*********************

	* all texas = tx_avgNPDcases
		bysort year: egen tx_allattys_nopub_ = total(attysnojuv) if attorney_marker==1 & pubdefender!=1
		*assigning to all obvs
		bysort year: egen tx_allattys_nopub = min(tx_allattys_nopub_)
		*generating case counts for all of texas
		bysort year: egen NPDcases_tx = total(njcases_all) if pubdefender!=1 & attorney_marker==1
		*generating actual variable
		bysort year: gen avgNPDcases_tx = NPDcases_tx / tx_allattys_nopub
		*assigning to all tx obs
		bysort year: egen tx_avgNPDcases = min(avgNPDcases_tx)
	
	* by county size -> look up code in original
		bysort bar countysize year: gen size_marker = _n
		replace size_marker = . if size_marker>1
		*generating non-PD attorneys across small, med, large
		bysort countysize year: egen size_allattys_nopub = total(attysnojuv) if size_marker==1 & pubdefender!=1
		*generating case counts for small, med, large
		bysort countysize year: egen NPDcases_countysize = total(njcases_all) if pubdefender!=1 & size_marker==1
		*generating actual variable
		bysort countysize year: gen avgNPDcases_countysize = NPDcases_countysize / size_allattys_nopub
		*assigning to all countysize-obs
		bysort countysize year: egen size_avgNPDcases = min(avgNPDcases_countysize)

********************************************************************************
* # of counties in which atty takes cases
********************************************************************************

	* across texas
	bysort bar year: gen atty_n = _n 
	bysort bar year: egen counties_served = max(atty_n)
	* across county size
	/// mean counties_served, over(countysize)

********************************************************************************
* total FTEs at county-level 
********************************************************************************

	* accounting ONLY for within-county cases
	egen allfelpaid = rowtotal(adultfelonycasespaid capitalmurdercasespaid)
	bysort county year: gen cFTEallfel = allfelpaid/128
	bysort county year: gen cFTEmisd = adultmisdemeanorcasespaid/226
	bysort county year: gen cFTEmisdapp = misdemeanorappealscasespaid/31.2
	bysort county year: gen cFTEfelapp = felonyappealscasespaid/31.2
	* attorney-level FTE of within-county caseload
	egen cFTE = rowtotal(cFTEallfel cFTEmisd cFTEmisdapp cFTEfelapp)
	
	* avg county-level FTE, using only attys' within county cases
		* total FTEs
		bysort county year: egen sum_countyFTE = sum(cFTE)
		* total attys
		bysort county year: egen allattys = total(attysnojuv)
		* estimate
		bysort county year: gen countyFTE_within = sum_countyFTE/allattys
		* assigning to all observations
		bysort county year: egen avgFTE_withincounty = min(countyFTE_within)
	
	* avg county-level FTE, using attys' statewide cases
		* total FTEs
		bysort county year: egen sum_statewideFTE = sum(FTEnojuv)
		* estimate
		bysort county year: gen countyFTE_across = sum_statewideFTE / allattys
		* assigning to all observations
		bysort county year: egen avgFTE_acrosscounty = min(countyFTE_across)		

********************************************************************************
* attys over 1.0 FTE
********************************************************************************
		
	* within-county FTEs ONLY
	gen c_overFTE = 1 if cFTE>=1.0000001
	* across-county (statewide) FTEs
	gen a_overFTE = 1 if FTEnojuv>=1.0000001
	
	*county-level totals of attorneys over 1.0 FTE
		*within-county cases ONLY
		bysort county year: egen ctotal_overFTE = total(c_overFTE)
		*across-county (statewide) cases
		bysort county year: egen atotal_overFTE = total(a_overFTE)

	*proportion of attorneys in county over 1.0 FTE
		*within-county cases ONLY
		bysort county year: gen prop_countyover = ctotal_overFTE /allattys
		*across-county (statewide) cases
		bysort county year: gen prop_attyover = atotal_overFTE /allattys

********************************************************************************
* % of cases represented by attorney over 1.0 FTE
********************************************************************************

	bysort year: egen overFTE_cases = sum(njcases_all) if attorney_marker==1 & a_overFTE==1
	bysort year: gen propcases_over_ = overFTE_cases / tx_allcases
	bysort year: egen propcases_over = min(propcases_over_)

********************************************************************************
* real vs perceived FTEs
********************************************************************************

*generating reported effort; first sum across attorney's statewide caseload
bysort bar year: egen rFTE = sum(adulttimepercent)
*divide by 100 to put it into FTE scale
replace rFTE = rFTE/100
*purging missing
replace rFTE = . if adulttimepercent==.
*generating difference measure
bysort bar year: gen FTEdiff = FTEnojuv-rFTE if adulttimepercent!=.
*generating graphing variable
gen FTEdiff_hist = FTEdiff
*trim crazy outliers
replace FTEdiff_hist = . if FTEdiff>5
replace FTEdiff_hist = . if FTEdiff<-5
replace FTEdiff = . if adulttimepercent==.
*only attys above 0
gen FTEover1 = FTEdiff_hist if FTEnojuv>=1

********************************************************************************
* cash-money
********************************************************************************

bysort bar year: egen cashmoney = total(totalpaid)

********************************************************************************
saveold "recoded_master", replace 
********************************************************************************

********************************************************************************
********************************************************************************
***************************    graphing for paper   ****************************
********************************************************************************
********************************************************************************
* Table 1: all no juv cases annually; all no juv attys, annually
********************************************************************************

* start using: "D:\caseload_study\recoded_master.dta"

/// mean tx_allcases, over(year)
/// mean all_attysnojuv, over(year)

*			tx_allcases	all_attysnojuv	
*2014		424422		5719
*2015		432854		5804
*2016		422662		5592
*2017		438234		5544

* in figure form: 

mean tx_allcases, over(year)
est store yearcases
coefplot yearcases, ///
plotregion(fcolor(white))  ///
ylabel(0 "0" 100000 "100,000" 200000 "200,000" 300000 "300,000" 400000 "400,000" 500000 "500,000" ,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r() lcolor(black)) xscale(r(0.5 5) lcolor(black)) ///
barwidth(0.75) norecycle legend(off) nokey noci recast(bar) vertical aspect(1)

mean all_attysnojuv, over(year)
est store allattys
coefplot allattys, ///
plotregion(fcolor(white))  ///
ylabel(0 2000 4000 6000, labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r() lcolor(black)) xscale(r(0.5 5) lcolor(black)) ///
barwidth(0.75) norecycle legend(off) nokey noci recast(bar) vertical aspect(1)

********************************************************************************
* Figure 1: # counties served by attys (sum) / avg # counties by county size
********************************************************************************
cd "D:\caseload_study\figures"

histogram counties_served, percent discrete ///
plotregion(fcolor(white))  ///
ylabel(0 "0%" 10 "10%" 20 "20%" 30 "30%" 40 "40%" 50 "50%", labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r(-1 50) lcolor(black)) xscale(r(0.5 5) lcolor(black)) ///
barwidth(0.75) legend(off) aspect(1) ///
ytitle(" ") xtitle("Total counties") title("A. No. counties served by attorneys") saving(figure1a, replace)

mean counties_served, over(countysize)
est store counties_served
coefplot counties_served, ///
plotregion(fcolor(white))  ///
ylabel(0 1 2 3 4 5, labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(1 `" "Small" "counties" "' 2 `" "Medium" "counties" "' 3 `" "Large" "counties" "' ,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r(0 4) lcolor(black)) xscale(r(0.5 3) lcolor(black)) ///
barwidth(0.5) norecycle recast(bar) vertical aspect(.9) blcolor(black) bfcolor(gs12) ///
ytitle("# of counties") xtitle(" ") title("B. Avg counties served by attorney") saving(figure1b, replace)

gr combine figure1a.gph figure1b.gph, saving(figure1, replace)

********************************************************************************
* Figure 2: average caseload by attorney fo small, med, large // PD (no dallas
*           or bowie), dallas, bowie
********************************************************************************

* all cases avg
mean avg_caseload, over(year)
est store all
* public defender
mean tx_PDavgcases, over(year)
est store allpd
* public defender - no dallas, no bowie
mean pdavg_nodalbo, over(year)
est store pd_nodalbow
* public defender, dallas
mean avgcases_dal, over(year)
est store pd_dallas
* public defender, bowie
mean avgcases_bow, over(year)
est store pd_bowie
* non-public defender (npd; assigned counsel)
mean tx_avgNPDcases, over(year)
est store allnpd
* npd - small counties
mean size_avgNPDcases if countysize==1, over(year)
est store smallnpd
* npd - med counties
mean size_avgNPDcases if countysize==2, over(year)
est store mednpd
* npd - large counties
mean size_avgNPDcases if countysize==3, over(year)
est store largenpd

coefplot all, bylabel("All attorneys") || allpd, bylabel("All PDs") || allnpd, bylabel("All Non-PDs") || pd_nodalbow, bylabel("PD - no Dallas / Bowie") || pd_dallas, bylabel("PD - Dallas") || pd_bowie, bylabel("PD - Bowie") || smallnpd, bylabel("NPD - small counties") || mednpd, bylabel("NPD - med counties") || largenpd, bylabel("NPD - large counties") ||, ///
plotregion(fcolor(white)) ///
ylabel(0 100 200 300 400 500 600,labcolor(black) grid glcolor(gs14) glpattern(solid) gstyle(noextend)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid) gstyle(noextend)) ///
yscale(r(-1 275) lcolor(black)) xscale(r(.5 4) lcolor(black)) ///
barwidth(0.5) norecycle blcolor(black) legend(off) nokey recast(bar) vertical aspect(.75) ///
ytitle("# of cases") saving(figure2, replace)


********************************************************************************
* Figure 3: FTEs within / FTEs across 
* Note: requires shapefile, and additional coordinates to generate texas map
********************************************************************************
clear
cd "D:\caseload_study\mapping"
use "D:\caseload_study\mapping\master_map_data.dta"

*avg FTE within
spmap avgFTE_withincounty using c1, id(id) clmethod(custom) clbreaks(0 .5 1 1.5 3) fcolor( white black*.18 red*.5 maroon) legend(label(2 "0.00-0.50") label(3 "0.51-1.00") label(4 "1.01-1.50") label(5 "+1.50")) legend(title("Avg. FTEs", size(small))) saving(withinFTE, replace)

*avg FTE across
spmap avgFTE_acrosscounty using c1, id(id) clmethod(custom) clbreaks(0 .5 1 1.5 3) fcolor( white black*.18 red*.5 maroon) legend(label(2 "0.00-0.50") label(3 "0.51-1.00") label(4 "1.01-1.50") label(5 "+1.50")) legend(title("Avg. FTEs", size(small))) saving(acrossFTE, replace)

*combining graphs
gr combine withinFTE.gph acrossFTE.gph, saving(figure3, replace)

********************************************************************************
* Figure 4: proportion of attys over 1 FTE within / across counties served 
* Note: requires shapefile, and additional coordinates to generate texas map
********************************************************************************

*within-county casesloads - proportion that is over 1 FTE
spmap prop_countyover using c1, id(id) clmethod(custom) clbreaks(0 0.01 .1 .2 1) fcolor(white red*.25 red*.5 maroon*.8) title("Proportion of attorneys over 1.0 FTE, accounting" " only for attorneys' appointed cases w/in given county", size(medsmall)) legend(label(2 "No attorneys over 1 FTE") label(3 "0.1% to 9.9%") label(4 "10% to 19%") label(5 "20%+") title("Proportion of attorneys over 1 FTE", size(medsmall))) saving(county1, replace)

*across-county caseloads - proportion that is over 1 FTE
spmap prop_attyover using c1, id(id) clmethod(custom) clbreaks(0 0.01 .1 .2 1) fcolor(white red*.25 red*.6 maroon*.8) title("Proportion of attorneys over 1.0 FTE, accounting" "for attorneys' appointed cases across all counties served", size(medsmall)) legend(label(2 "No attorneys over 1 FTE") label(3 "0.1% to 9.9%") label(4 "10% to 19%") label(5 "20%+") title("Proportion of attorneys over 1 FTE", size(medsmall)))  saving(atty1, replace)

*combining graphs
gr combine county1.gph atty1.gph, saving(figure4, replace)

********************************************************************************
* Figure 5: proportion of cases by attorney over 1.0 FTE
********************************************************************************
clear 
cd "D:\caseload_study\"
use "D:\caseload_study\recoded_master.dta"
cd "D:\caseload_study\figures"

/// check and adjust label properties 

mean propcases_over, over(year)
est store propcases_over
coefplot propcases_over, ///
plotregion(fcolor(white)) ///
ylabel(0 "0%" .1 "10%" .2 "20%" .3 "30%" .4 "40%" .5 "50%" ,labcolor(black) grid glcolor(gs14) glpattern(solid) gstyle(noextend)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid) gstyle(noextend)) ///
yscale(r(-.01 .5) lcolor(black)) xscale(r(1 4) lcolor(black)) ///
barwidth(0.5) norecycle legend(off) nokey noci recast(bar) vertical aspect(1) blcolor(black) bfcolor(gs12) ///
title("Percent of cases represented by attorney over 1.0 FTE") saving(figure5, replace)


********************************************************************************
* Figure 6: difference b/w self-reported and calculated FTEs
********************************************************************************

*generating reported effort; first sum across attorney's statewide caseload
bysort bar year: egen rFTE = sum(adulttimepercent)
*divide by 100 to put it into FTE scale
replace rFTE = rFTE/100
*purging missing
replace rFTE = . if adulttimepercent==.
*generating difference measure
bysort bar year: gen FTEdiff = FTEnojuv-rFTE if adulttimepercent!=.
*generating graphing variable
gen FTEdiff_hist = FTEdiff
*trim crazy outliers
replace FTEdiff_hist = . if FTEdiff>5
replace FTEdiff_hist = . if FTEdiff<-5
replace FTEdiff = . if adulttimepercent==.
*only attys above 0
gen FTEover1 = FTEdiff_hist if FTEnojuv>=1

hist FTEdiff_hist, percent ///
plotregion(fcolor(white))  ///
ylabel(0 "0%" 5 "5%" 10 "10%" 15 "15%" 20 "20%", labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r(-.5 21) lcolor(black)) xscale(r() lcolor(black)) ///
legend(off) aspect(1) ///
ytitle(" ") xtitle("Actual FTEs - Reported FTEs") title("A. All attorneys") saving(figure6a, replace)


hist FTEover1, percent bin(15) ///
plotregion(fcolor(white))  ///
ylabel(0 "0%" 10 "10%" 20 "20%" 30 "30%" , labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(,labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r(-.5 21) lcolor(black)) xscale(r() lcolor(black)) ///
legend(off) aspect(1) ///
ytitle(" ") xtitle("Actual FTEs - Reported FTEs") title("B. Attorneys over 1.0 FTE") saving(figure6b, replace)

gr combine figure6a.gph figure6b.gph, saving(figure6, replace)

********************************************************************************
* Figure 7: compensation
********************************************************************************

histogram cashmoney, percent ///
plotregion(fcolor(white))  ///
ylabel(0 "0%" 10 "10%" 20 "20%" 30 "30%" 40 "40%", labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
xlabel(0 "$0" 100000 "$100,000" 200000 "$200,000" 300000 "$300,000" 400000 "$400,000" 500000 "$500,000" 600000 "$600,000" 700000 "$700,000" 800000 "$800,000",labcolor(black) grid glcolor(gs14) glpattern(solid)) ///
yscale(r(-.5 21) lcolor(black)) xscale(r() lcolor(black)) ///
legend(off) aspect(1) ///
xtitle(" ") ytitle(" ")
